###爬取微博热搜
#导入外部模块
import requests
import csv
import os
import re
import parsel
from datetime import datetime
#获取当前时间
time = datetime.now().strftime("%Y年%m月%d日%H时%M分%S秒")
#指定文件路径
file_path = os.path.join('csv', f'微博热搜_{time}.csv')
os.makedirs(os.path.dirname(file_path), exist_ok=True)
#打开csv文件
f = open(file_path,mode = 'a',encoding = 'utf-8-sig',newline = '')
#写入表头
csv_writer = csv.DictWriter(f,fieldnames=[
    '序号','热搜内容','热度'
])
csv_writer.writeheader()
#指定url和标头
url = 'https://s.weibo.com/top/summary?cate=realtimehot'
headers = {
    'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
    'cookie' : 'SUB=_2AkMQZSGdf8NxqwFRmf4XzWrrboVwzgjEieKmOdBGJRMxHRl-yT9kqh1TtRB6O-UPctIs6Sw5AWUPA1W8dl51Cdne0Fd0; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WW4ffjDlKq7D4o3HZFxyBNy; _s_tentry=weibo.com; appkey=; Apache=4058370872730.6436.1731833570994; SINAGLOBAL=4058370872730.6436.1731833570994; ULV=1731833570996:1:1:1:4058370872730.6436.1731833570994:; PC_TOKEN=79e9f0afee; UOR=,,cn.bing.com; _dd_s=logs=1&id=cae4a1b5-6738-4a26-958d-1ddeb8dd52af&created=1731833842778&expire=1731834760799',
    'referer' : 'https://s.weibo.com/top/summary?cate=entrank'
}
#发请求
res = requests.get(url = url, headers = headers)
# 创建parsel选择器对象
selector = parsel.Selector(res.text)
#使用css选择器提取数据
trs = selector.css('#pl_top_realtimehot tbody tr')
#序号
index = 1

for tr in trs:
    #热搜内容
    content = tr.css('.td-02 a::text').get()
    #热度
    hot_text = tr.css('.td-02 span::text').get()
    #只保留数字
    if hot_text is not None and isinstance(hot_text, str):
        hot = re.findall(r'\d+', hot_text)
        hot = int(hot[0]) if hot else None
    else:
        hot = None
    # 写入表格行
    dic = {
        '序号' : index,
        '热搜内容': content,
        '热度': hot
    }
    csv_writer.writerow(dic)

    print(index,content,hot)
    #序号递增一
    index += 1
#关闭文件
f.close()